Stat. 651: Project

Author

VAMSHI REDDY MADEM, DEEPAK GUGULLA

Published

December 1, 2023

# libraries
library(cluster)
library(ggplot2)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(leaflet)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(sf)
Warning: package 'sf' was built under R version 4.4.1
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0     ✔ stringr 1.5.1
✔ purrr   1.0.2     ✔ tibble  3.2.1
✔ readr   2.1.5     ✔ tidyr   1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks plotly::filter(), stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(statebins)
library(viridis)
Loading required package: viridisLite
library(tibble)
# Convert the votes.repub dataset to a data frame
votes_tidy <- as.data.frame(votes.repub)

# Add a column for State names
votes_tidy <- votes_tidy %>%
  rownames_to_column(var = "State") %>%
  pivot_longer(cols = -State, names_to = "Year", values_to = "Percentage")

# Clean the "Year" column to remove "X" prefix and convert to numeric
votes_tidy <- votes_tidy %>%
  mutate(Year = as.numeric(gsub("X", "", Year)),
         Date = ymd(paste0(Year, "-01-01")))
# Filter data for the year 1976
votes_1976 <- votes_tidy[votes_tidy$Year == 1976, ]

# Handle missing percentages (if any)
votes_1976 <- votes_1976 %>% drop_na(Percentage)
statebins(votes_1976, state_col = "State", value_col = "Percentage",
          palette = "Blues") +
  labs(title = "Republican Vote Percentage by State (1976)",
       fill = "Percentage") +
  theme_statebins(legend_position = "bottom")

state_data2 <- votes_1976 |>
mutate(state = str_to_lower(State))
states <- map_data("state")
states <- states |> rename(state = region)
states_joined <- left_join(states, state_data2, by = "state")
p0 <- ggplot(data = states_joined, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = Percentage), color = "black") +
coord_quickmap() +
scale_fill_gradient(name = "Vote Percentage", low = "yellow", high = "red", na.value = "gray") +
theme_minimal() +
labs(
title = "Republicians voting percentage, 1976",
fill = ""
)

# Convert to an interactive plotly map
interactive_map <- ggplotly(p0)

# Show the interactive map
interactive_map
# Create a statebin graph with enhancements
 statebins(votes_1976, state_col = "State", value_col = "Percentage") +
  scale_fill_gradient(low = "lightblue", high = "darkblue", na.value = "gray90") + # Gradient for clarity
  labs(
    title = "Vote Percentage by State (1976)",
    subtitle = "Visualizing voting percentages across states",
    fill = "Vote %"
  ) +
  theme_statebins(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5)
  )
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

library(plotly)
library(dplyr)

# Average Voting Percentage by Year
yearly_avg <- votes_tidy %>%
  group_by(Year) %>%
  summarise(
    Mean_Percentage = mean(Percentage, na.rm = TRUE),
    Median_Percentage = median(Percentage, na.rm = TRUE)
  )

# Interactive Line Plot
p1 <- plot_ly(yearly_avg, x = ~Year, y = ~Mean_Percentage, 
              type = 'scatter', mode = 'lines+markers',
              name = 'Mean Voting %') %>%
  add_trace(y = ~Median_Percentage, name = 'Median Voting %') %>%
  layout(
    title = 'Voting Percentages Over Time',
    xaxis = list(title = 'Year'),
    yaxis = list(title = 'Voting Percentage'),
    hovermode = 'compare'
  )

p1
# Top and Bottom States by Average Voting Percentage
state_avg <- votes_tidy %>%
  group_by(State) %>%
  summarise(Avg_Percentage = mean(Percentage, na.rm = TRUE)) %>%
  arrange(desc(Avg_Percentage))

top_bottom_states <- bind_rows(
  head(state_avg, 5),
  tail(state_avg, 5)
)

p5 <- plot_ly(top_bottom_states, x = ~State, y = ~Avg_Percentage, 
              type = 'bar', 
              color = ~State,
              text = ~round(Avg_Percentage, 2),
              textposition = 'auto') %>%
  layout(
    title = 'Top 5 and Bottom 5 States by Average Voting Percentage',
    xaxis = list(title = 'State', tickangle = -45),
    yaxis = list(title = 'Average Voting Percentage')
  )

p5
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
# Interactive Box Plot of Voting Percentages by State
p2 <- plot_ly(votes_tidy, x = ~State, y = ~Percentage, 
              type = 'box', 
              color = ~State) %>%
  layout(
    title = 'Voting Percentage Distribution by State',
    xaxis = list(title = 'State', tickangle = -45),
    yaxis = list(title = 'Voting Percentage')
  )

p2
Warning: Ignoring 217 observations
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
library(plotly)
library(dplyr)

# Prepare the data
voting_data_processed <- votes_tidy %>%
  group_by(State, Year) %>%
  summarise(
    Avg_Percentage = mean(Percentage, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  # Sort by average percentage to create meaningful ordering
  group_by(State) %>%
  mutate(
    Mean_State_Percentage = mean(Avg_Percentage, na.rm = TRUE),
    Trend_Direction = ifelse(
      lm(Avg_Percentage ~ Year)$coefficients[2] > 0, 
      "Increasing", 
      "Decreasing"
    )
  ) %>%
  ungroup()

# Create a more sophisticated heatmap
p <- plot_ly(
  data = voting_data_processed,
  x = ~Year,
  y = ~reorder(State, Mean_State_Percentage),  # Order states by mean percentage
  z = ~Avg_Percentage,
  type = 'heatmap',
  colors = colorRamp(c("#0000FF", "#FFFFFF", "#FF0000")),
  text = ~paste(
    "State: ", State,
    "<br>Year: ", Year,
    "<br>Voting %: ", round(Avg_Percentage, 2),
    "<br>Overall Trend: ", Trend_Direction
  ),
  hoverinfo = 'text'
) %>%
  layout(
    title = "Voting Percentage Evolution: State by State",
    xaxis = list(
      title = "Year", 
      tickmode = 'linear',
      dtick = 10  # Show every 10 years
    ),
    yaxis = list(
      title = "States (Ordered by Mean Voting %)",
      tickfont = list(size = 8)  # Smaller font for state names
    ),
    margin = list(l = 150)  # Wider left margin for state names
  )

# Display the plot
p